library(dplyr)

## set working directory to 
## Dataverse file

## Note: for this survey
## you only need the "voters" survey
## as the non-voters survey didn't include
## our "rich get richer" question 

survey <- read.table("Harris_Data/Harris 1984 Presidential Election Survey, study no. 842105/harris_s842105_voters_spss.tab", header = TRUE)

## change everything below 
## note only need voters survey 

# pid
survey$pid <- c(1:nrow(survey))

# study 
survey$study <- "842105"

# study year (year)
survey$year <- 1984

# geographic data (urban)
survey$urban <- NA

# geographic data (region)
survey$region <- NA

# respondent head of household (hh)
survey$hh <- NA

# increasing inequality (inequality)
table(survey$Q13_2)
survey$inequality <- dplyr::recode(as.character(survey$Q13_2),
                            `1` = "Feel",
                            `2` = "Don't Feel",
                            `3` = "Not Sure")
table(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
table(survey$F6_1)
survey$union.self <- dplyr::recode(survey$F6_1,
                            `0` = "No",
                            `1` = "Yes")
table(survey$union.self)

survey$union.other <- dplyr::recode(survey$F6_2,
                             `0` = "No",
                             `1` = "Yes")
table(survey$union.other)

table(survey$F6_4) # not sure
survey[survey$F6_4 == 1, c("union.self", "union.other", "F6_3")]

survey$union.self[survey$F6_4 == 1] <- "Not Sure"
survey$union.other[survey$F6_4 == 1] <- "Not Sure"

# employment (employed)
survey$employed <- NA

# empl self
survey$employed.self <- NA

# occupation
table(survey$F1)
survey$occupation <- dplyr::recode(survey$F1, 
                            `1` = "Professional",
                            `2` = "Manager, official",
                            `3` = "Proprietor (small business)",
                            `4` = "Clerical worker",
                            `5` = "Sales worker",
                            `6` = "Skilled craftsman, foreman",
                            `7` = "Operative, unskilled laborer (except farm)",
                            `8` = "Service worker",
                            `9` = "Farmer, farm manager, farm laborer",
                            `10` = "Student",
                            `11` = "Housewife",
                            `12` = "Military service",
                            `13` = "Unemployed",
                            `14` = "Retired",
                            `15` = "Welfare",
                            `16` = "Disabled",
                            `17` = "Other (specify)",
                            `18` = "Not sure")
table(survey$occupation)

## occ self
survey$occupation.self <- NA

# household size (hhsize)
## only 18 and older - doesn't count younger
table(survey$F2)
survey$hhsize <- as.character(survey$F2)
table(survey$hhsize)

# education (educ)
table(survey$F5)
survey$educ <- dplyr::recode(as.character(survey$F5), 
                            `1` = "No formal schooling",
                            `2` = "First through 7th grade",
                            `3` = "8th grade",
                            `4` = "Some high school",
                            `5` = "High school graduate",
                            `6` = "Some college",
                            `7` = "Two year college graduate",
                            `8` = "Four year college graduate",
                            `9` = "Postgraduate")
table(survey$educ)

# household income (income)
table(survey$F9)
survey$income <- dplyr::recode(survey$F9,
                        `1` = "Under $7500",
                        `2` = "$7,501 to $15,000",
                        `3` = "$15,001 to $25,000",
                        `4` = "$25,001 to $35,000",
                        `5` = "$35,001 to $50,000",
                        `6` = "$50,001 and over",
                        `7` = "Not sure/refused")
table(survey$income)

# age
table(survey$F4)
survey$age <- dplyr::recode(survey$F4,
                     `1` = "18 to 20",
                     `2` = "21 to 24",
                     `3` = "25 to 29",
                     `4` = "30 to 34",
                     `5` = "35 to 39",
                     `6` = "40 to 49",
                     `7` = "50 to 64",
                     `8` = "65 and over",
                     `9` = "Refused")
table(survey$age)

# race
table(survey$F11)
survey$race1 <- dplyr::recode(survey$F11,
                      `1` = "White",
                      `2` = "Black",
                      `3` = "Oriental/Asian or Pacific Islander",
                      `4` = "American Indian or Alaskan native",
                      `5` = "Not sure")
survey$race2 <- dplyr::recode(survey$F10,
                              `3` = "Decline/not sure",
                              `1` = "Yes, hispanic",
                              `2` = "No, not hispanic")
table(survey$race1)
table(survey$race2)
sum(is.na(survey$race2))

survey$race <- ifelse(survey$race1 == "Not sure" |
                        is.na(survey$race1), 
                      "Decline/not sure", ifelse(survey$race1 == "White",
                                                 ifelse(survey$race2 == "Decline/not sure" |
                                                          is.na(survey$race2),
                                                        "Decline/not sure",
                                                        ifelse(survey$race2 == "No, not hispanic",
                                                               "Non-Hispanic White",
                                                               "Hispanic White")), "Non-white"))
table(survey$race)
table(survey$race[survey$race1 == "White"])

# politics (party)
table(survey$Q1B)
survey$party <- dplyr::recode(survey$Q1B,
                       `1` = "Republican",
                       `2` = "Democrat",
                       `3` = "Independent",
                       `4` = "Other",
                       `5` = "Not sure")
table(survey$party)

# politics (ideology)
table(survey$F8)
survey$ideology <- dplyr::recode(survey$F8,
                          `1` = "Conservative",
                          `2` = "Middle of the road",
                          `3` = "Liberal",
                          `4` = "Radical",
                          `5` = "Not sure")
table(survey$ideology)

# gender
summary(survey$S1)
survey$gender <- (dplyr::recode(survey$S1,
                                      `1` = "Male",
                                      `2` = "Female"))
table(survey$gender)

# religion
table(survey$F7)
survey$religion <- dplyr::recode(survey$F7,
                          `1` = "Protestant",
                          `2` = "Catholic",
                          `3` = "Jewish",
                          `4` = "Other (write in)",
                          `5` = "None",
                          `6` = "Not sure")
table(survey$religion)

# factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- dplyr::recode(survey$Q13_1,
                                 `1` = "Feel",
                                 `2` = "Don t feel",
                                 `3` = "Not sure")
survey$dontcount <- dplyr::recode(survey$Q13_3,
                                  `1` = "Feel",
                                  `2` = "Don t feel",
                                  `3` = "Not sure")
survey$leftout <- dplyr::recode(survey$Q13_4,
                                `1` = "Feel",
                                `2` = "Don t feel",
                                `3` = "Not sure")

## question placement
survey$question_place <- "after party"

# subset
survey_842105voters <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                                 "inequality", "inequality.variable", "union.self", "union.other",
                                 "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                                 "age", "race", "party", "ideology", "gender", "religion",
                                 "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                                 "question_place")]


#saveRDS(survey_842105voters, file = "Harris_Data/survey_842105voters.rds")

